#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(ggpubr)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr) ; library(kableExtra)
library(biomaRt)
library(clusterProfiler) ; library(ReactomePA) ; library(DOSE) ; library(org.Hs.eg.db)
library(WGCNA)

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Load preprocessed dataset (preprocessing code in 01_data_preprocessing.Rmd) and clustering (pipeline in 05_WGCNA.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame


# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]


# Clusterings
clusterings = read_csv('./../Data/clusters.csv')


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
             mutate(`gene-score`=ifelse(is.na(`gene-score`), 'Others', `gene-score`)) %>%
             left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
             mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
             mutate(gene.score=ifelse(`gene-score`=='Others' & Neuronal==1, 'Neuronal', `gene-score`), 
                    significant=padj<0.05 & !is.na(padj))

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)

genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))


clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]

dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]

load('./../Data/GSEA.RData')
GSEA_SFARI = enrichment_SFARI

load('./../Data/ORA.RData')
ORA_SFARI = enrichment_SFARI


rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds, enrichment_DGN, enrichment_DO, enrichment_GO,
   enrichment_KEGG, enrichment_Reactome, enrichment_SFARI)


Selecting Top Modules


We have results both from GSEA and ORA to measure the enrichment of SFARI Genes in each module, and they both agree with each other relatively well

SFARI_genes_by_module = c()

for(module in names(GSEA_SFARI)){
  
  GSEA_info = GSEA_SFARI[[module]] %>% dplyr::select(ID, pvalue, p.adjust, NES) %>%
              mutate(pvalue = ifelse(NES>0, pvalue, 1-pvalue), 
                     p.adjust = ifelse(NES>0, p.adjust, 1)) %>%
              dplyr::rename('GSEA_pval' = pvalue, 'GSEA_padj'= p.adjust)
  
  ORA_info = ORA_SFARI[[module]] %>% dplyr::select(ID, pvalue, p.adjust, qvalue, GeneRatio, Count) %>%
             dplyr::rename('ORA_pval' = pvalue, 'ORA_padj' = p.adjust)
  
  module_info = GSEA_info %>% full_join(ORA_info, by = 'ID') %>% add_column(.before = 'ID', Module = module)
  
  SFARI_genes_by_module = rbind(SFARI_genes_by_module, module_info)
}

SFARI_genes_by_module = SFARI_genes_by_module %>% 
                        left_join(dataset %>% dplyr::select(Module, MTcor) %>% 
                                  group_by(Module,MTcor) %>% tally %>% ungroup, by = 'Module') %>%
                        mutate(ORA_pval = ifelse(is.na(ORA_pval), 1, ORA_pval),
                               ORA_padj = ifelse(is.na(ORA_padj), 1, ORA_padj))

plot_data = SFARI_genes_by_module %>% filter(ID=='SFARI')

ggplotly(plot_data %>% ggplot(aes(1-GSEA_pval, 1-ORA_pval, size = n)) + 
         geom_point(color = plot_data$Module, alpha = .7, aes(id=Module)) + 
         geom_smooth(se=FALSE, color = '#CCCCCC') + 
         xlab('GSEA Enrichment') + ylab('ORA Enrichment') + coord_fixed() +
         ggtitle(paste0('Corr = ', round(cor(plot_data$GSEA_pval, plot_data$ORA_pval),2))) +
         theme_minimal() + theme(legend.position = 'none'))



To determine which modules have a statistically significant enrichment in SFARI Genes we can use the adjusted p-values. We used the Bonferroni correction for this.

GSEA identifies 15/91 as significant. This doesn’t make sense

ggplotly(plot_data %>% ggplot(aes(GSEA_padj, ORA_padj, size = n)) + 
         geom_point(color = plot_data$Module, alpha = .7, aes(id=Module)) + 
         geom_smooth(se=FALSE, color = '#CCCCCC') + 
         geom_hline(yintercept = 0.01, color = 'gray', linetype = 'dashed') +
         geom_vline(xintercept = 0.01, color = 'gray', linetype = 'dashed') +
         xlab('GSEA adjusted p-value') + ylab('ORA adjusted p-value') + 
         scale_x_log10(limits = c(min(plot_data$GSEA_padj, plot_data$ORA_padj),1.2)) + 
         scale_y_log10(limits = c(min(plot_data$GSEA_padj, plot_data$ORA_padj),1.2)) +
         ggtitle(paste0('Corr = ',round(cor(plot_data$GSEA_padj, plot_data$ORA_padj),2))) + coord_fixed() +
         theme_minimal() + theme(legend.position = 'none'))
plot_data = plot_data %>% mutate(GSEA_sig = GSEA_padj<0.01, ORA_sig = ORA_padj<0.01) %>%
            apply_labels(GSEA_sig = 'GSEA significant enrichment',
                         ORA_sig = 'ORA significant enrichment')

cro(plot_data$GSEA_sig, list(plot_data$ORA_sig, total()))
 ORA significant enrichment     #Total 
 FALSE   TRUE   
 GSEA significant enrichment 
   FALSE  75 1   76
   TRUE  13 2   15
   #Total cases  88 3   91



The ‘over-enrichment’ in SFARI Modules in GSEA could be because SFARI Genes have in general higher Module Memberships than the other genes, which would make them cluster at the beginning of the list constantly and would bias the enrichment analysis.

Looking at the plot below, we can see that there is not a uniform distribution of SFARI genes across all quantiles of the Module Membership values, but they instead seem to cluster around Module Membership values with high magnitudes (both positive and negative), so I don’t think the GSEA results for the SFARI genes are valid.

Because of this, I’m going to use the enrichment from the ORA to study the SFARI Genes

quant_data = dataset %>% dplyr::select(ID, contains('MM.')) %>% 
             left_join(genes_info %>% dplyr::select(ID, gene.score), by = 'ID') %>% dplyr::select(-ID) %>%
             melt %>% mutate(quant = cut(value, breaks = quantile(value, probs = seq(0,1,0.05)) %>% 
                                     as.vector, labels = FALSE)) %>%
             group_by(gene.score, quant) %>% tally %>% ungroup %>% ungroup
  
quant_data = quant_data %>% group_by(quant) %>% summarise(N = sum(n)) %>% ungroup %>% 
            left_join(quant_data, by = 'quant') %>% dplyr::select(quant, gene.score, n, N) %>% 
            mutate(p = round(100*n/N,2)) %>% filter(!is.na(quant)) %>%
            mutate(gene.score = factor(gene.score, levels = rev(c('1','2','3','Neuronal','Others'))))

ggplotly(quant_data %>% filter(!gene.score %in% c('Neuronal','Others')) %>% 
         ggplot(aes(quant, p, fill = gene.score)) + geom_bar(stat='identity') + 
         xlab('Module Membership Quantiles') + ylab('% of SFARI Genes in Quantile') +
         ggtitle('Percentage of Genes labelled as SFARI in each Quantile') +
         scale_fill_manual(values = SFARI_colour_hue(r=rev(c(1:3)))) + 
         theme_minimal() + theme(legend.position = 'none'))
rm(quant_data)


Selecting modules with an adjusted p-value below 0.01 using the ORA

ggplotly(plot_data %>% ggplot(aes(MTcor, ORA_padj, size=n)) + 
         geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) +
         geom_hline(yintercept = 0.01, color = 'gray', linetype = 'dotted') + 
         xlab('Module-Diagnosis Correlation') + ylab('Corrected p-values') + scale_y_log10() +
         ggtitle('Modules Significantly Enriched in SFARI Genes') +
         theme_minimal() + theme(legend.position = 'none'))
top_modules = plot_data %>% arrange(desc(ORA_padj)) %>% filter(ORA_padj<0.01) %>% pull(Module) %>% as.character

plot_data %>% filter(Module %in% top_modules) %>% arrange(ORA_pval) %>%
              dplyr::select(Module, MTcor, ORA_pval, ORA_padj, qvalue, GeneRatio, Count) %>%
              rename( ORA_pval = 'p-value', ORA_padj = 'Adjusted p-value') %>%
              kable %>% kable_styling(full_width = F)
Module MTcor p-value Adjusted p-value qvalue GeneRatio Count
#FD61D0 0.1879079 0.0000000 0.0000002 0.0000001 16/56 16
#00B933 0.3460181 0.0001397 0.0006985 0.0002206 54/575 54
#00BBDA -0.4208181 0.0007684 0.0038420 0.0016177 27/247 27
pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% 
            left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
            dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
            mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
            mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
                   alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
                   gene_id = paste0(ID, ' (', external_gene_id, ')')) %>%
            apply_labels(ImportantModules = 'Top Modules')

plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) + 
              geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
              ggtitle('Genes belonging to the Modules with the highest SFARI Genes Enrichment')

rm(pca)





Identifying representative genes for each Module


Following the WGCNA pipeline, selecting the genes with the highest Module Membership and Gene Significance

Top 20 genes for each module


Ordered by \(\frac{MM+|GS|}{2}\)

There aren’t that many SFARI genes in the top genes of the modules

create_table = function(module){
  top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>% 
              dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
              filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>% 
              mutate(Relevance = (MM+abs(GS))/2) %>% arrange(by=-Relevance) %>% top_n(20) %>%
              dplyr::rename('Gene Symbol' = external_gene_id, 'SFARI Score' = gene.score)
  return(top_genes)
}

top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])

kable(top_genes[[1]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[1], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[1]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #00BBDA (MTcor = -0.42)
Gene Symbol MM GS SFARI Score Relevance
RHOBTB2 0.5492828 -0.6440469 Others 0.5966649
GPR27 0.7092604 -0.4702614 Others 0.5897609
CYTH3 0.7638147 -0.3977080 Others 0.5807613
TTYH3 0.6707149 -0.4758652 Others 0.5732900
SLC6A1 0.6913062 -0.4290180 1 0.5601621
OAT 0.5774140 -0.5207602 Others 0.5490871
ARNT2 0.5590371 -0.5339837 3 0.5465104
YWHAH 0.6923178 -0.3935747 Others 0.5429462
SERINC1 0.6190739 -0.4521134 Others 0.5355937
ACTR3B 0.6413435 -0.4275961 Others 0.5344698
MARCH4 0.4800645 -0.5548974 Others 0.5174810
DNAJC6 0.6409081 -0.3927409 Others 0.5168245
SLC25A4 0.5233769 -0.4903237 Others 0.5068503
WSB2 0.6454349 -0.3416964 Others 0.4935656
PNMA1 0.7262341 -0.2592167 Others 0.4927254
TELO2 0.4377163 -0.5445601 Others 0.4911382
GNB1 0.6428579 -0.3347353 Others 0.4887966
SPRN 0.5276961 -0.4423674 Others 0.4850318
ILF3 0.6767221 -0.2920871 Others 0.4844046
DDX3X 0.5132590 -0.4539177 1 0.4835884
kable(top_genes[[2]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[2], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[2]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #00B933 (MTcor = 0.35)
Gene Symbol MM GS SFARI Score Relevance
SLC4A7 0.8026955 0.6737220 Others 0.7382087
PAK3 0.8356130 0.5818014 Others 0.7087072
TCF4 0.7640326 0.5478270 1 0.6559298
CCDC88A 0.8409032 0.4023141 Others 0.6216086
RSF1 0.9344956 0.2827544 Others 0.6086250
ZBTB20 0.6820465 0.4960797 1 0.5890631
SMC3 0.8615803 0.3105801 3 0.5860802
PCDH7 0.7962620 0.3753828 Others 0.5858224
UBXN4 0.7543204 0.4164262 Others 0.5853733
ITSN2 0.7955002 0.3693109 Others 0.5824055
KMT2E 0.8167611 0.3444643 2 0.5806127
PPP1R12A 0.8137440 0.3437888 Others 0.5787664
MTDH 0.6799727 0.4768581 Others 0.5784154
ZCRB1 0.6517623 0.5046602 Others 0.5782113
MECP2 0.6372228 0.5160819 1 0.5766523
ATRX 0.8122093 0.3208853 1 0.5665473
DDX21 0.6730998 0.4597490 Others 0.5664244
SRSF11 0.7288430 0.4026356 2 0.5657393
SREK1 0.7476646 0.3806850 Others 0.5641748
KDM5A 0.8216399 0.3055697 Others 0.5636048
kable(top_genes[[3]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[3], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[3]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #FD61D0 (MTcor = 0.19)
Gene Symbol MM GS SFARI Score Relevance
MYCBP2 0.8196944 0.2898447 Others 0.5547696
DENND5B 0.6693310 0.3577416 Others 0.5135363
RIMS2 0.7596851 0.2572706 Others 0.5084779
ARHGAP32 0.7650873 0.2334209 3 0.4992541
MEF2C 0.8236810 0.1712714 3 0.4974762
MYO9A 0.4798712 0.5049781 Others 0.4924246
FZD3 0.6064150 0.3588440 Others 0.4826295
CDKL5 0.6059903 0.2699966 1 0.4379934
BICD1 0.6759105 0.1920989 Others 0.4340047
NRXN1 0.7912961 0.0742731 1 0.4327846
TTBK2 0.6799173 0.1518390 Others 0.4158782
GABRB2 0.7448393 0.0771744 Others 0.4110069
SCN2A 0.7331332 0.0740752 1 0.4036042
NRXN3 0.5854827 0.2183796 1 0.4019312
UBR5 0.5687603 0.2312289 2 0.3999946
RYR2 0.6766000 0.1172778 Others 0.3969389
SYT1 0.6696169 -0.1157074 Others 0.3926622
TSHZ3 0.5527810 0.2308938 1 0.3918374
GABRA1 0.7207382 0.0176543 Others 0.3691963
PLCB1 0.6967621 -0.0130355 2 0.3548988
rm(create_table, i)

The top genes tend to have a relatively high mean level of expression

pca = datExpr %>% prcomp

ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
            mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
            mutate(alpha = ifelse(color %in% top_modules & ID %in% ids, 1, 0.1))

plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) + 
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
              theme_minimal() + ggtitle('Most relevant genes for top Modules')

rm(ids, pca, tg, plot_data)




Enrichment Analysis


Using the package clusterProfiler. Performing Gene Set Enrichment Analysis (GSEA) and Over Representation Analysis (ORA) using the following datasets:

  • Gene Ontology

  • Disease Ontology

  • Disease Gene Network

  • KEGG

  • REACTOME

# GSEA
load('./../Data/GSEA.RData')

# Rename lists
GSEA_GO = enrichment_GO
GSEA_DGN = enrichment_DGN
GSEA_DO = enrichment_DO
GSEA_KEGG = enrichment_KEGG
GSEA_Reactome = enrichment_Reactome
GSEA_SFARI = enrichment_SFARI


# ORA
load('./../Data/ORA.RData')

# Rename lists
ORA_GO = enrichment_GO
ORA_DGN = enrichment_DGN
ORA_DO = enrichment_DO
ORA_KEGG = enrichment_KEGG
ORA_Reactome = enrichment_Reactome
ORA_SFARI = enrichment_SFARI

rm(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome)
compare_methods = function(GSEA_list, ORA_list){
  
  for(top_module in top_modules){
  
    cat(paste0('  \n  \nEnrichments for Module ', top_module, ' (MTcor=',
               round(dataset$MTcor[dataset$Module==top_module][1],2), '):  \n  \n'))
    
    GSEA = GSEA_list[[top_module]]
    ORA = ORA_list[[top_module]]
    
    cat(paste0('GSEA has ', nrow(GSEA), ' enriched terms  \n'))
    cat(paste0('ORA has  ', nrow(ORA), ' enriched terms  \n'))
    cat(paste0(sum(ORA$ID %in% GSEA$ID), ' terms are enriched in both methods  \n  \n'))

    plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
                inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% 
                           dplyr::select(ID, pval_ORA, GeneRatio, qvalue), by = 'ID') 
    
    if(nrow(plot_data)>0){
      print(plot_data %>% mutate(pval_mean = pval_ORA + pval_GSEA) %>% 
                          arrange(pval_mean) %>% dplyr::select(-pval_mean) %>% 
            kable %>% kable_styling(full_width = F))
    }
  } 
}


plot_results = function(GSEA_list, ORA_list){
  
  l = htmltools::tagList()

  for(i in 1:length(top_modules)){
    
    GSEA = GSEA_list[[top_modules[i]]]
    ORA = ORA_list[[top_modules[i]]]
    
    plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
                inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% dplyr::select(ID, pval_ORA), by = 'ID')
    
    if(nrow(plot_data)>5){
      min_val = min(min(plot_data$pval_GSEA), min(plot_data$pval_ORA))
      max_val = max(max(max(plot_data$pval_GSEA), max(plot_data$pval_ORA)),0.05)
      ggp = ggplotly(plot_data %>% ggplot(aes(pval_GSEA, pval_ORA, color = NES)) + 
                     geom_point(aes(id = Description)) + 
                     geom_vline(xintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     geom_hline(yintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     ggtitle(paste0('Enriched terms in common for Module ', top_modules[i])) +
                     scale_x_continuous(limits = c(min_val, max_val)) + 
                     scale_y_continuous(limits = c(min_val, max_val)) + 
                     xlab('Corrected p-value for GSEA') + ylab('Corrected p-value for ORA') +
                     scale_colour_viridis(direction = -1) + theme_minimal() + coord_fixed())
      l[[i]] = ggp
    }
  }
  
  return(l)
}


KEGG

compare_methods(GSEA_KEGG, ORA_KEGG)

Enrichments for Module #00BBDA (MTcor=-0.42):

GSEA has 8 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00B933 (MTcor=0.35):

GSEA has 2 enriched terms
ORA has 5 enriched terms
2 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
hsa03008 Ribosome biogenesis in eukaryotes 2.067246 0.0185047 0.0128670 9/200 0.0035037
hsa05322 Systemic lupus erythematosus 2.014849 0.0394820 0.0000999 14/200 0.0000999

Enrichments for Module #FD61D0 (MTcor=0.19):

GSEA has 10 enriched terms
ORA has 2 enriched terms
0 terms are enriched in both methods


Reactome

compare_methods(GSEA_Reactome, ORA_Reactome)

Enrichments for Module #00BBDA (MTcor=-0.42):

GSEA has 28 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00B933 (MTcor=0.35):

GSEA has 54 enriched terms
ORA has 81 enriched terms
50 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
R-HSA-1640170 Cell Cycle 1.721185 0.0113650 0.0000001 52/321 0.0000000
R-HSA-3247509 Chromatin modifying enzymes 1.947809 0.0127978 0.0000000 33/321 0.0000000
R-HSA-4839726 Chromatin organization 1.947809 0.0127978 0.0000000 33/321 0.0000000
R-HSA-3108232 SUMO E3 ligases SUMOylate target proteins 1.922789 0.0133653 0.0000000 29/321 0.0000000
R-HSA-212165 Epigenetic regulation of gene expression 2.031024 0.0141667 0.0000000 23/321 0.0000000
R-HSA-211000 Gene Silencing by RNA 2.160193 0.0145551 0.0000011 19/321 0.0000000
R-HSA-2559580 Oxidative Stress Induced Senescence 2.143811 0.0147222 0.0000000 20/321 0.0000000
R-HSA-5250913 Positive epigenetic regulation of rRNA expression 2.225877 0.0148508 0.0000000 20/321 0.0000000
R-HSA-5578749 Transcriptional regulation by small RNAs 2.130850 0.0149524 0.0000010 17/321 0.0000000
R-HSA-3214815 HDACs deacetylate histones 2.192155 0.0151443 0.0000000 19/321 0.0000000
R-HSA-8936459 RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function 2.266836 0.0151443 0.0000002 17/321 0.0000000
R-HSA-3214858 RMTs methylate histone arginines 2.156240 0.0151912 0.0000010 16/321 0.0000000
R-HSA-3214841 PKMTs methylate histone lysines 2.265960 0.0152410 0.0000000 20/321 0.0000000
R-HSA-5250924 B-WICH complex positively regulates rRNA expression 2.229208 0.0152410 0.0000001 17/321 0.0000000
R-HSA-1221632 Meiotic synapsis 2.103650 0.0157116 0.0000000 17/321 0.0000000
R-HSA-427389 ERCC6 (CSB) and EHMT2 (G9a) positively regulate rRNA expression 2.138078 0.0157659 0.0000000 17/321 0.0000000
R-HSA-5693571 Nonhomologous End-Joining (NHEJ) 2.312576 0.0157659 0.0000000 16/321 0.0000000
R-HSA-2299718 Condensation of Prophase Chromosomes 2.371031 0.0158198 0.0000000 16/321 0.0000000
R-HSA-606279 Deposition of new CENPA-containing nucleosomes at the centromere 2.415026 0.0158902 0.0000000 16/321 0.0000000
R-HSA-774815 Nucleosome assembly 2.415026 0.0158902 0.0000000 16/321 0.0000000
R-HSA-912446 Meiotic recombination 2.163160 0.0158902 0.0000000 15/321 0.0000000
R-HSA-212300 PRC2 methylates histones and DNA 2.163053 0.0159782 0.0000000 15/321 0.0000000
R-HSA-110328 Recognition and association of DNA glycosylase with site containing an affected pyrimidine 2.175060 0.0162342 0.0000000 15/321 0.0000000
R-HSA-110329 Cleavage of the damaged pyrimidine 2.175060 0.0162342 0.0000000 15/321 0.0000000
R-HSA-73928 Depyrimidination 2.175060 0.0162342 0.0000000 15/321 0.0000000
R-HSA-73929 Base-Excision Repair, AP Site Formation 2.175060 0.0162342 0.0000000 15/321 0.0000000
R-HSA-5625886 Activated PKN1 stimulates transcription of AR (androgen receptor) regulated genes KLK2 and KLK3 2.255317 0.0162342 0.0000000 14/321 0.0000000
R-HSA-427359 SIRT1 negatively regulates rRNA expression 2.197242 0.0163913 0.0000000 14/321 0.0000000
R-HSA-5334118 DNA methylation 2.217968 0.0163913 0.0000000 14/321 0.0000000
R-HSA-73728 RNA Polymerase I Promoter Opening 2.269001 0.0163913 0.0000000 14/321 0.0000000
R-HSA-110330 Recognition and association of DNA glycosylase with site containing an affected purine 2.373844 0.0164526 0.0000000 15/321 0.0000000
R-HSA-110331 Cleavage of the damaged purine 2.373844 0.0164526 0.0000000 15/321 0.0000000
R-HSA-171306 Packaging Of Telomere Ends 2.373844 0.0164526 0.0000000 15/321 0.0000000
R-HSA-73927 Depurination 2.373844 0.0164526 0.0000000 15/321 0.0000000
R-HSA-2990846 SUMOylation 1.873891 0.0265519 0.0000000 29/321 0.0000000
R-HSA-5620912 Anchoring of the basal body to the plasma membrane 2.162389 0.0145120 0.0122982 14/321 0.0001410
R-HSA-5625740 RHO GTPases activate PKNs 2.123409 0.0302885 0.0000016 16/321 0.0000000
R-HSA-5693565 Recruitment and ATM-mediated phosphorylation of repair and signaling proteins at DNA double strand breaks 2.120408 0.0311157 0.0000000 18/321 0.0000000
R-HSA-5693606 DNA Double Strand Break Response 2.120408 0.0311157 0.0000000 18/321 0.0000000
R-HSA-68886 M Phase 1.752112 0.0363589 0.0000162 36/321 0.0000002
R-HSA-5617833 Cilium Assembly 1.910764 0.0397134 0.0004538 23/321 0.0000055
R-HSA-8953854 Metabolism of RNA 1.723945 0.0109490 0.0456673 48/321 0.0004968
R-HSA-72203 Processing of Capped Intron-Containing Pre-mRNA 1.783812 0.0503212 0.0069476 26/321 0.0000808
R-HSA-1500620 Meiosis 2.062276 0.0607647 0.0000000 18/321 0.0000000
R-HSA-1912408 Pre-NOTCH Transcription and Translation 2.039518 0.0607647 0.0000010 16/321 0.0000000
R-HSA-3214842 HDMs demethylate histones 2.084814 0.0642100 0.0000000 16/321 0.0000000
R-HSA-9018519 Estrogen-dependent gene expression 1.882479 0.0697179 0.0019346 18/321 0.0000228
R-HSA-1474165 Reproduction 2.009032 0.0903052 0.0000000 18/321 0.0000000
R-HSA-69278 Cell Cycle, Mitotic 1.636632 0.0929054 0.0000637 42/321 0.0000008
R-HSA-1852241 Organelle biogenesis and maintenance 1.723768 0.0493833 0.0610793 26/321 0.0006560

Enrichments for Module #FD61D0 (MTcor=0.19):

GSEA has 38 enriched terms
ORA has 2 enriched terms
1 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
R-HSA-112316 Neuronal System 1.817456 0.0108269 0.0779653 8/36 0.0365206


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_Reactome, ORA_Reactome)


Gene Ontology

compare_methods(GSEA_GO, ORA_GO)

Enrichments for Module #00BBDA (MTcor=-0.42):

GSEA has 26 enriched terms
ORA has 3 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00B933 (MTcor=0.35):

GSEA has 71 enriched terms
ORA has 107 enriched terms
53 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
GO:0051276 chromosome organization 1.910729 0.0475851 0.0000000 86/488 0.0000000
GO:0006396 RNA processing 2.083744 0.0477177 0.0001086 68/488 0.0000025
GO:0006259 DNA metabolic process 1.688344 0.0484306 0.0000104 65/488 0.0000004
GO:0006974 cellular response to DNA damage stimulus 1.626043 0.0488029 0.0001675 59/488 0.0000035
GO:0006325 chromatin organization 2.092795 0.0497765 0.0000000 68/488 0.0000000
GO:0007017 microtubule-based process 1.850879 0.0504519 0.0000341 51/488 0.0000010
GO:0070925 organelle assembly 1.753699 0.0498979 0.0013949 50/488 0.0000261
GO:0006397 mRNA processing 2.092783 0.0513932 0.0002556 45/488 0.0000051
GO:0008380 RNA splicing 2.053987 0.0523666 0.0005267 41/488 0.0000102
GO:0044782 cilium organization 1.962341 0.0555675 0.0001139 33/488 0.0000025
GO:0060271 cilium assembly 1.998239 0.0558887 0.0000434 33/488 0.0000012
GO:0000226 microtubule cytoskeleton organization 1.869246 0.0533070 0.0042517 36/488 0.0000724
GO:0071103 DNA conformation change 1.971281 0.0586526 0.0000000 32/488 0.0000000
GO:0031503 protein-containing complex localization 1.935758 0.0578412 0.0020103 26/488 0.0000361
GO:0071824 protein-DNA complex subunit organization 2.027556 0.0598072 0.0000766 25/488 0.0000019
GO:0006338 chromatin remodeling 2.382088 0.0606265 0.0000000 29/488 0.0000000
GO:0006333 chromatin assembly or disassembly 2.198934 0.0615305 0.0000009 25/488 0.0000000
GO:0006323 DNA packaging 2.073797 0.0618323 0.0000004 25/488 0.0000000
GO:0034728 nucleosome organization 2.216471 0.0627335 0.0000017 23/488 0.0000001
GO:0031497 chromatin assembly 2.085017 0.0636927 0.0000008 22/488 0.0000000
GO:0060147 regulation of posttranscriptional gene silencing 2.027969 0.0648657 0.0000940 18/488 0.0000022
GO:0060964 regulation of gene silencing by miRNA 2.027969 0.0648657 0.0000940 18/488 0.0000022
GO:0060966 regulation of gene silencing by RNA 2.027969 0.0648657 0.0000940 18/488 0.0000022
GO:0006334 nucleosome assembly 2.080711 0.0650384 0.0000013 20/488 0.0000000
GO:0045638 negative regulation of myeloid cell differentiation 2.150275 0.0653078 0.0002379 17/488 0.0000048
GO:0030219 megakaryocyte differentiation 2.116928 0.0657991 0.0000120 18/488 0.0000004
GO:0043044 ATP-dependent chromatin remodeling 2.399869 0.0667354 0.0000000 22/488 0.0000000
GO:0000726 non-recombinational repair 2.103023 0.0668988 0.0000762 16/488 0.0000019
GO:0045652 regulation of megakaryocyte differentiation 2.192708 0.0672609 0.0000004 18/488 0.0000000
GO:0006303 double-strand break repair via nonhomologous end joining 2.133881 0.0675149 0.0000231 16/488 0.0000007
GO:0006336 DNA replication-independent nucleosome assembly 2.285467 0.0701684 0.0000000 17/488 0.0000000
GO:0034724 DNA replication-independent nucleosome organization 2.285467 0.0701684 0.0000000 17/488 0.0000000
GO:0034508 centromere complex assembly 2.319020 0.0701684 0.0000001 16/488 0.0000000
GO:0043486 histone exchange 2.424400 0.0709940 0.0000000 16/488 0.0000000
GO:0031055 chromatin remodeling at centromere 2.323129 0.0712170 0.0000000 16/488 0.0000000
GO:0016233 telomere capping 2.163511 0.0714263 0.0000000 16/488 0.0000000
GO:0034080 CENP-A containing nucleosome assembly 2.437971 0.0723020 0.0000000 16/488 0.0000000
GO:0061641 CENP-A containing chromatin organization 2.437971 0.0723020 0.0000000 16/488 0.0000000
GO:0000183 chromatin silencing at rDNA 2.227051 0.0733032 0.0000000 15/488 0.0000000
GO:0006335 DNA replication-dependent nucleosome assembly 2.284645 0.0745075 0.0000000 14/488 0.0000000
GO:0034723 DNA replication-dependent nucleosome organization 2.284645 0.0745075 0.0000000 14/488 0.0000000
GO:0034968 histone lysine methylation 2.230454 0.0652216 0.0110059 15/488 0.0001748
GO:0120031 plasma membrane bounded cell projection assembly 1.759826 0.0518964 0.0370942 38/488 0.0005175
GO:0000375 RNA splicing, via transesterification reactions 1.971013 0.0542972 0.0374651 31/488 0.0005175
GO:0030031 cell projection assembly 1.747850 0.0517846 0.0487325 38/488 0.0006362
GO:0018023 peptidyl-lysine trimethylation 2.209662 0.0725920 0.0320544 9/488 0.0004559
GO:0018022 peptidyl-lysine methylation 2.056097 0.0641787 0.0453486 15/488 0.0006003
GO:0006406 mRNA export from nucleus 2.125568 0.0653078 0.0444143 14/488 0.0005962
GO:0071427 mRNA-containing ribonucleoprotein complex export from nucleus 2.125568 0.0653078 0.0444143 14/488 0.0005962
GO:0006405 RNA export from nucleus 2.140491 0.0641198 0.0562618 15/488 0.0007149
GO:0000377 RNA splicing, via transesterification reactions with bulged adenosine as nucleophile 1.951349 0.0544377 0.0729604 30/488 0.0008914
GO:0000398 mRNA splicing, via spliceosome 1.951349 0.0544377 0.0729604 30/488 0.0008914
GO:0097711 ciliary basal body-plasma membrane docking 2.158110 0.0650384 0.0704318 14/488 0.0008832

Enrichments for Module #FD61D0 (MTcor=0.19):

GSEA has 42 enriched terms
ORA has 23 enriched terms
1 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
GO:0050877 nervous system process 1.6683 0.0460065 0.0002512 15/52 0.000215


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_GO, ORA_GO)


Disease Ontology

compare_methods(GSEA_DO, ORA_DO)

Enrichments for Module #00BBDA (MTcor=-0.42):

GSEA has 0 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00B933 (MTcor=0.35):

GSEA has 1 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #FD61D0 (MTcor=0.19):

GSEA has 7 enriched terms
ORA has 5 enriched terms
3 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
DOID:0060040 pervasive developmental disorder 2.190339 0.0072451 0.0112162 7/33 0.0036588
DOID:0060041 autism spectrum disorder 2.154444 0.0073129 0.0706203 6/33 0.0138221
DOID:12849 autistic disorder 2.154444 0.0073129 0.0706203 6/33 0.0138221


Disease Gene Network

compare_methods(GSEA_DGN, ORA_DGN)

Enrichments for Module #00BBDA (MTcor=-0.42):

GSEA has 0 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00B933 (MTcor=0.35):

GSEA has 0 enriched terms
ORA has 6 enriched terms
0 terms are enriched in both methods

Enrichments for Module #FD61D0 (MTcor=0.19):

GSEA has 15 enriched terms
ORA has 13 enriched terms
3 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
umls:C0014544 Epilepsy 1.780997 0.0234847 0.0006046 13/50 0.0001374
umls:C0004352 Autistic Disorder 1.875717 0.0234003 0.0063712 12/50 0.0011580
umls:C1096063 Drug Resistant Epilepsy 2.174315 0.0324183 0.0000664 6/50 0.0000603



Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] WGCNA_1.69             fastcluster_1.1.25     dynamicTreeCut_1.63-1 
##  [4] org.Hs.eg.db_3.8.2     AnnotationDbi_1.46.1   IRanges_2.18.3        
##  [7] S4Vectors_0.22.1       Biobase_2.44.0         BiocGenerics_0.30.0   
## [10] DOSE_3.10.2            ReactomePA_1.28.0      clusterProfiler_3.12.0
## [13] biomaRt_2.40.5         kableExtra_1.1.0       knitr_1.28            
## [16] doParallel_1.0.15      iterators_1.0.12       foreach_1.5.0         
## [19] polycor_0.7-10         expss_0.10.2           ggpubr_0.2.5          
## [22] magrittr_1.5           GGally_1.5.0           gridExtra_2.3         
## [25] viridis_0.5.1          viridisLite_0.3.0      RColorBrewer_1.1-2    
## [28] dendextend_1.13.4      plotly_4.9.2           glue_1.4.1            
## [31] reshape2_1.4.4         forcats_0.5.0          stringr_1.4.0         
## [34] dplyr_1.0.0            purrr_0.3.4            readr_1.3.1           
## [37] tidyr_1.1.0            tibble_3.0.1           ggplot2_3.3.2         
## [40] tidyverse_1.3.0       
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_1.1.0            RSQLite_2.2.0              
##   [3] htmlwidgets_1.5.1           grid_3.6.3                 
##   [5] BiocParallel_1.18.1         munsell_0.5.0              
##   [7] codetools_0.2-16            preprocessCore_1.46.0      
##   [9] withr_2.2.0                 colorspace_1.4-1           
##  [11] GOSemSim_2.10.0             highr_0.8                  
##  [13] rstudioapi_0.11             ggsignif_0.6.0             
##  [15] labeling_0.3                urltools_1.7.3             
##  [17] GenomeInfoDbData_1.2.1      polyclip_1.10-0            
##  [19] bit64_0.9-7                 farver_2.0.3               
##  [21] vctrs_0.3.1                 generics_0.0.2             
##  [23] xfun_0.12                   R6_2.4.1                   
##  [25] GenomeInfoDb_1.20.0         graphlayouts_0.7.0         
##  [27] locfit_1.5-9.4              DelayedArray_0.10.0        
##  [29] bitops_1.0-6                reshape_0.8.8              
##  [31] fgsea_1.10.1                gridGraphics_0.5-0         
##  [33] assertthat_0.2.1            scales_1.1.1               
##  [35] ggraph_2.0.3                nnet_7.3-14                
##  [37] enrichplot_1.4.0            gtable_0.3.0               
##  [39] tidygraph_1.2.0             rlang_0.4.6                
##  [41] genefilter_1.66.0           splines_3.6.3              
##  [43] lazyeval_0.2.2              acepack_1.4.1              
##  [45] impute_1.58.0               broom_0.5.5                
##  [47] europepmc_0.4               checkmate_2.0.0            
##  [49] BiocManager_1.30.10         yaml_2.2.1                 
##  [51] modelr_0.1.6                crosstalk_1.1.0.1          
##  [53] backports_1.1.8             qvalue_2.16.0              
##  [55] Hmisc_4.4-0                 tools_3.6.3                
##  [57] ggplotify_0.0.5             ellipsis_0.3.1             
##  [59] ggridges_0.5.2              Rcpp_1.0.4.6               
##  [61] plyr_1.8.6                  base64enc_0.1-3            
##  [63] progress_1.2.2              zlibbioc_1.30.0            
##  [65] RCurl_1.98-1.2              prettyunits_1.1.1          
##  [67] rpart_4.1-15                cowplot_1.0.0              
##  [69] SummarizedExperiment_1.14.1 haven_2.2.0                
##  [71] ggrepel_0.8.2               cluster_2.1.0              
##  [73] fs_1.4.0                    data.table_1.12.8          
##  [75] DO.db_2.9                   triebeard_0.3.0            
##  [77] reprex_0.3.0                reactome.db_1.68.0         
##  [79] matrixStats_0.56.0          xtable_1.8-4               
##  [81] hms_0.5.3                   evaluate_0.14              
##  [83] XML_3.99-0.3                jpeg_0.1-8.1               
##  [85] readxl_1.3.1                compiler_3.6.3             
##  [87] crayon_1.3.4                htmltools_0.4.0            
##  [89] mgcv_1.8-31                 Formula_1.2-3              
##  [91] geneplotter_1.62.0          lubridate_1.7.4            
##  [93] DBI_1.1.0                   tweenr_1.0.1               
##  [95] dbplyr_1.4.2                MASS_7.3-51.6              
##  [97] rappdirs_0.3.1              Matrix_1.2-18              
##  [99] cli_2.0.2                   igraph_1.2.5               
## [101] GenomicRanges_1.36.1        pkgconfig_2.0.3            
## [103] rvcheck_0.1.8               foreign_0.8-76             
## [105] xml2_1.2.5                  annotate_1.62.0            
## [107] webshot_0.5.2               XVector_0.24.0             
## [109] rvest_0.3.5                 digest_0.6.25              
## [111] graph_1.62.0                rmarkdown_2.1              
## [113] cellranger_1.1.0            fastmatch_1.1-0            
## [115] htmlTable_1.13.3            curl_4.3                   
## [117] graphite_1.30.0             lifecycle_0.2.0            
## [119] nlme_3.1-147                jsonlite_1.7.0             
## [121] fansi_0.4.1                 pillar_1.4.4               
## [123] lattice_0.20-41             httr_1.4.1                 
## [125] survival_3.1-12             GO.db_3.8.2                
## [127] UpSetR_1.4.0                png_0.1-7                  
## [129] bit_1.1-15.2                ggforce_0.3.1              
## [131] stringi_1.4.6               blob_1.2.1                 
## [133] DESeq2_1.24.0               latticeExtra_0.6-29        
## [135] memoise_1.1.0